LAGOS Analysis

Loading in data

First download and then specifically grab the locus (or site lat longs)

#Lagos download script
#lagosne_get(dest_folder = LAGOSNE:::lagos_path(),overwrite=T)

#Load in lagos
lagos <- lagosne_load()
## Warning in (function (version = NULL, fpath = NA) : LAGOSNE version unspecified,
## loading version: 1.087.3
#Grab the lake centroid info
lake_centers <- lagos$locus

# Make an sf object 
spatial_lakes <- st_as_sf(lake_centers,coords=c('nhd_long','nhd_lat'),
                          crs=4326)

#Grab the water quality data
nutr <- lagos$epi_nutr

#Look at column names
#names(nutr)

Subset columns nutr to only keep key info that we want

clarity_only <- nutr %>%
  select(lagoslakeid,sampledate,chla,doc,secchi) %>%
  mutate(sampledate = as.character(sampledate) %>% ymd(.))

Keep sites with at least 200 observations

#Look at the number of rows of dataset
#nrow(clarity_only)

chla_secchi <- clarity_only %>%
  filter(!is.na(chla),
         !is.na(secchi))

# How many observatiosn did we lose?
# nrow(clarity_only) - nrow(chla_secchi)


# Keep only the lakes with at least 200 observations of secchi and chla
chla_secchi_200 <- chla_secchi %>%
  group_by(lagoslakeid) %>%
  mutate(count = n()) %>%
  filter(count > 200)

Join water quality data to spatial data

spatial_200 <- inner_join(spatial_lakes,chla_secchi_200 %>%
                            distinct(lagoslakeid,.keep_all=T),
                          by='lagoslakeid')

Mean Chl_a map

### Take the mean chl_a and secchi by lake

mean_values_200 <- chla_secchi_200 %>%
  # Take summary by lake id
  group_by(lagoslakeid) %>%
  # take mean chl_a per lake id
  summarize(mean_chl = mean(chla,na.rm=T),
            mean_secchi=mean(secchi,na.rm=T)) %>%
  #Get rid of NAs
  filter(!is.na(mean_chl),
         !is.na(mean_secchi)) %>%
  # Take the log base 10 of the mean_chl
  mutate(log10_mean_chl = log10(mean_chl))

#Join datasets
mean_spatial <- inner_join(spatial_lakes,mean_values_200,
                          by='lagoslakeid') 

#Make a map
mapview(mean_spatial,zcol='log10_mean_chl')

Class work

1) What is the correlation between Secchi Disk Depth and Chlorophyll a for sites with at least 200 observations?

Secchi disk depth is negatively exponentially correlation with secchi disk depth. As chlorophyll increases, there becomes a point where very little light is able to penetrate the water beyond a short distance.

#Your code here
ggplot(chla_secchi %>% 
         group_by(lagoslakeid) %>% 
         summarise(meanchla=mean(chla),
                   meansecchi=mean(secchi)),aes(meanchla,meansecchi)) + 
  geom_point()

2) What states have the most data?

2a) First you will need to make a lagos spatial dataset that has the total number of counts per site.

lake_centers <- lake_centers %>%
  group_by(lagoslakeid,nhd_long,nhd_lat,state_zoneid) %>%
  summarise(n=n())
## `summarise()` has grouped output by 'lagoslakeid', 'nhd_long', 'nhd_lat'. You can override using the `.groups` argument.

2b) Second, you will need to join this point dataset to the us_boundaries data.

lake_states <- lagos$state

lake_statecenters<-left_join(lake_centers,lake_states,"state_zoneid")

2c) Then you will want to group by state and sum all the observations in that state and arrange that data from most to least total observations per state.

lake_obsn <- lake_statecenters %>% 
  group_by(state_name) %>% 
  summarise(n=sum(n)) %>% 
  arrange(desc(n)) %>% 
  drop_na()

states <- us_states() %>% 
  mutate(state_name=name)

kable(lake_obsn)
state_name n
Minnesota 29022
Michigan 15569
Wisconsin 13790
New York 11950
Illinois 11805
Missouri 9116
Indiana 7942
Ohio 6120
Pennsylvania 5922
Maine 5518
Iowa 4636
Massachusetts 3912
New Jersey 3333
New Hampshire 2544
Connecticut 2025
Vermont 1626
Rhode Island 618
lake_statecenterboundaries <- left_join(lake_obsn,states,"state_name") %>% 
  drop_na()

lake_countmap <- st_as_sf(lake_statecenterboundaries)

mapview(lake_countmap,zcol='n')

3 Is there a spatial pattern in Secchi disk depth for lakes with at least 200 observations?

Lakes that were further from urban areas tended to have higher secchi depth disks, indicating clearer water. This could be due to nutrient runoff from high population areas leading to high amounts of chla, and lower visibility in the water.

spatial_200 <- st_as_sf(left_join(chla_secchi_200,lake_centers,"lagoslakeid"),coords=c("nhd_long","nhd_lat"))

mapview(spatial_200,zcol='secchi')